***** globals *****
global projectdir "~"
global datadir "$projectdir/data"
global results "$projectdir/results"

set more off



use "$datadir/jobhist_lehd_E_06102021.dta" , clear

tsset emp_num year



*Add num jobs once i've run that, max_wage

*******SETUP*******

local depvars       count_ht_mode_start_2012 count_start young count_ht_mode_young_2012 alt_univ_ein_max R1 log_wage ht_overage
local stats_vars    max_wage w2_num_jobs lehd_num_jobs
local occup         occup_faculty occup_gradpostdoc occup_undergradstudent occup_other_staff
local field         field_science field_engin field_bioMedPharma field_other	

* Number of things
  * univ
unique storgabbr
  * people
unique pik
unique pik if share_federal>0 & !missing(share_federal)
unique pik if share_private>0 & !missing(share_private)
unique pik if share_federal>0 & share_private>0 & !missing(share_private) & !missing(share_private)

unique pinumber_infill

unique pik if in_sample==1
unique pinumber_infill if in_sample==1

reg ht_mode_start_2012, cluster(pik)
local pik_count = e(N_clust)
gen pik_count = `pik_count'
gen pik_count_r = round(pik_count, 1000)

* 1 Means & standard deviations
est clear
estpost  sum `depvars' `stats_vars ' `occup' `field' if in_sample==1

esttab using "$results/raw_summstats_07122021.csv", replace cells("mean(fmt(%9.4g)) sd(fmt(%9.4g))") noobs

foreach var of varlist `stats_vars' {
repalce `var' = . if `var' == 0
gen `var'_for_stats=`var'
sum `var' if in_sample==1
replace `var'_for_stats=r(mean) if missing(`var')	
}
estpost  sum *_for_stats

esttab using"$results/raw_summstats_07122021.csv", append cells("mean(fmt(%9.4g)) sd(fmt(%9.4g))") noobs


* 2 Quasimedians
foreach var of varlist `depvars' `occup' `stats_vars' `field' {
_pctile `var' if in_sample==1, p(49,51)
return list
local p_49 = `r(r1)'
local p_51 = `r(r2)'
local short_var = substr("`var'", 1, 22)
gen for_qmed_`short_var' = 1 if `var'>=`p_49' & `var'<=`p_51'
sum `var' if for_qmed_`short_var' == 1 & in_sample==1
gen qmed_`short_var'=`r(mean)'
drop for_qmed_`short_var'
}

display ""
display "`depvars' `occup' `stats_vars' `field'"
estpost sum qmed_* 

esttab using"$results/raw_summstats_07122021.csv", append cells("mean(fmt(%9.4g))") noobs


* 3 By Occupation and Field
foreach var of varlist `occup'  {
estpost sum count_ht_mode_start_2012 if `var'==1
esttab using"$results/raw_summstats_07122021.csv", append cells("mean(fmt(%9.4g)) sd(fmt(%9.4g))") noobs title(`var')
}
